1 module modular_db.sql_preprocessor; 2 3 // This file contains simplified SQLite's lexer, patched for our needs. 4 // Original one can be viewed here: 5 // https://www.sqlite.org/src/artifact?ci=trunk&filename=src/tokenize.c 6 // Known bugs: 7 // * Tcl-style parameters can be parsed incorrectly: https://www.sqlite.org/lang_expr.html#varparam 8 9 import std.algorithm.comparison: among; 10 import std.array: Appender; 11 import std.typecons: Tuple; 12 import std.utf: byCodeUnit; 13 14 private pure @safe: 15 16 alias _ByteString = typeof("".byCodeUnit!(const(char)[ ])); 17 18 public enum SqlPreprocessorOptions { 19 none, 20 quoteLowercaseIdents = 0x1, 21 quoteUppercaseIdents = 0x2, 22 dedent = 0x4, 23 stripComments = 0x8, 24 } 25 26 bool _isLineBreak(char c) nothrow @nogc { 27 return c == '\n'; 28 } 29 30 bool _isSpace(char c) nothrow @nogc { 31 return !!c.among!(' ', '\t', '\r', '\f'); 32 } 33 34 bool _isDigit(char c) nothrow @nogc { 35 return c - '0' < 10u; 36 } 37 38 bool _isLower(char c) nothrow @nogc { 39 return c - 'a' < 26u; 40 } 41 42 bool _isUpper(char c) nothrow @nogc { 43 return c - 'A' < 26u; 44 } 45 46 bool _isIdentStartNotLower(char c) nothrow @nogc { 47 return c - 'A' < 26u || c.among!('_', ':', '@', '$', '#') || c & 0x80; 48 } 49 50 bool _isIdentStartNotUpper(char c) nothrow @nogc { 51 return c - 'a' < 26u || c.among!('_', ':', '@', '$', '#') || c & 0x80; 52 } 53 54 bool _isIdent(char c) nothrow @nogc { 55 return (c | 0x20) - 'a' < 26u || c - '0' < 10u || c == '_' || c == '$' || c & 0x80; 56 } 57 58 bool _isStringStart(char c) nothrow @nogc { 59 return c == '\'' || c == '"' || c == '`'; 60 } 61 62 const(char)[ ] _parseQualifier(ref _ByteString s) { 63 // - | [1-9] \d* | (?!0) 64 if (s.empty) 65 return null; 66 switch (s.front) { 67 case '-': 68 s.popFront(); 69 return "-"; 70 71 case '1': .. case '9': 72 const tmp = s.source; 73 s._skipWhile!_isDigit(); 74 return tmp[0 .. $ - s.length]; 75 76 case '0': 77 throw new Exception("Can't qualify an identifier with 0"); 78 79 default: 80 return null; 81 } 82 } 83 84 Tuple!(const(char)[ ], q{schema}, const(char)[ ], q{moduleId}) 85 _parseQualifiers(ref _ByteString s) { 86 import std.algorithm.searching: skipOver; 87 import std.conv: text; 88 import std.range.primitives: empty; 89 90 /+ 91 (?: 92 (?&qualifier) \. 93 )? 94 (?: 95 (?&qualifier) \| 96 | (?![\d-]) 97 ) 98 +/ 99 const q0 = _parseQualifier(s); 100 if (!s.empty) 101 switch (s.front) { 102 case '|': 103 s.popFront(); 104 return typeof(return)(null, q0); 105 106 case '.': 107 s.popFront(); 108 const q1 = _parseQualifier(s); 109 if (s.skipOver('|') || q1.empty) 110 return typeof(return)(q0, q1); 111 throw new Exception(text("Invalid qualifier: expected '|' after '", q0, '.', q1, '\'')); 112 113 default: 114 break; 115 } 116 if (q0.empty) 117 return typeof(return).init; 118 throw new Exception(text("Invalid qualifier: expected '|' after '", q0, '\'')); 119 } 120 121 char _skipWhile(alias pred)(ref _ByteString _s) nothrow @nogc { 122 auto s = _s; 123 scope(success) _s = s; 124 char c; 125 do 126 s.popFront(); 127 while (!s.empty && pred((c = s.front))); 128 return c; 129 } 130 131 alias _skipIdent = _skipWhile!_isIdent; 132 alias _skipAnyWhitespace = _skipWhile!(c => _isSpace(c) || _isLineBreak(c)); 133 134 _ByteString _copyBracketedIdent(_ByteString s, Appender!string app) { 135 import std.exception: enforce; 136 137 auto lag = s; 138 // Copy everything until ']', escaping quotation marks in the process. 139 while (true) { 140 enforce(!s.empty, "Unclosed square bracket"); 141 const c = s.front; 142 s.popFront(); 143 if (c == ']') 144 break; 145 if (c == '"') { 146 app ~= lag.source[0 .. $ - s.length]; 147 app ~= '"'; 148 lag = s; 149 } else 150 enforce(!_isLineBreak(c), "Unclosed square bracket"); 151 } 152 app ~= lag.source[0 .. $ - s.length - 1]; 153 return s; 154 } 155 156 _ByteString _skipPlaceholder(_ByteString s) /+nothrow+/ { 157 import std.format: FormatException, FormatSpec; 158 import std.range: NullSink, dropOne; 159 160 if (s[1] == '%') // "%%" 161 return s[2 .. $]; 162 auto fmt = FormatSpec!char(s.source); 163 NullSink sink; 164 try { 165 const specFound = fmt.writeUpToNextSpec(sink); 166 assert(specFound); 167 } catch (FormatException) 168 return s.dropOne(); // Skip '%'. 169 // catch (Exception e) 170 // assert(false, e.msg); 171 return s[$ - fmt.trailing.length .. $]; 172 } 173 174 bool _skipSingleLineComment(bool prepareToStrip)(ref _ByteString _s) nothrow @nogc { 175 auto s = _s; 176 scope(success) _s = s; 177 static if (prepareToStrip) 178 bool allowedToStrip = true; 179 for (s = s[2 .. $]; !s.empty; s.popFront()) { 180 const c = s.front; 181 if (_isLineBreak(c)) { 182 static if (prepareToStrip) 183 return allowedToStrip; 184 else 185 return false; 186 } 187 static if (prepareToStrip) 188 if (c == '%') 189 allowedToStrip = false; 190 } 191 return false; // Must not strip a comment on the last line. 192 } 193 194 bool _skipMultiLineComment(bool prepareToStrip)(ref _ByteString _s) nothrow @nogc { 195 auto s = _s; 196 scope(success) _s = s; 197 static if (prepareToStrip) 198 bool allowedToStrip = true; 199 bool prevStar; 200 s = s[2 .. $]; 201 while (!s.empty) { 202 const c = s.front; 203 s.popFront(); 204 if (prevStar && c == '/') { 205 static if (prepareToStrip) 206 return allowedToStrip; 207 else 208 return false; 209 } 210 prevStar = c == '*'; 211 static if (prepareToStrip) 212 if (c == '%') 213 allowedToStrip = false; 214 } 215 return false; // Must not strip an unclosed comment. 216 } 217 218 public Tuple!(string, q{sql}, bool, q{usesSchema}, bool, q{usesModuleId}) 219 preprocessSql(SqlPreprocessorOptions options)(const(char)[ ] sql, size_t firstAvailableArg) { 220 import std.array: appender; 221 import std.conv: toChars; 222 import std.range.primitives: empty; 223 224 if (sql.empty) 225 return typeof(return).init; 226 auto app = appender!string(); 227 auto s = sql.byCodeUnit(); 228 auto schemaDefaultIndex = toChars(firstAvailableArg); 229 auto moduleIdDefaultIndex = toChars(firstAvailableArg + 1); 230 bool usesSchema; 231 bool usesModuleId; 232 char c = s.front; 233 234 static if (options & SqlPreprocessorOptions.quoteLowercaseIdents) { 235 static assert(!(options & SqlPreprocessorOptions.quoteUppercaseIdents), 236 "Cannot set both `quoteLowercaseIdents` and `quoteUppercaseIdents`", 237 ); 238 enum shouldQuote = true; 239 alias isIdentStart = _isLower; 240 alias isKeywordStart = _isIdentStartNotLower; 241 enum charX = 'x'; 242 } else static if (options & SqlPreprocessorOptions.quoteUppercaseIdents) { 243 enum shouldQuote = true; 244 alias isIdentStart = _isUpper; 245 alias isKeywordStart = _isIdentStartNotUpper; 246 enum charX = 'X'; 247 } else 248 enum shouldQuote = false; 249 enum shouldDedent = !!(options & SqlPreprocessorOptions.dedent); 250 enum shouldStripComments = !!(options & SqlPreprocessorOptions.stripComments); 251 252 static if (shouldDedent) 253 if (_isSpace(c)) { 254 const crlf = c == '\r' && s.length >= 2 && s[1] == '\n'; 255 const nonSpace = s._skipAnyWhitespace(); 256 // Retain one space at the beginning of the string. 257 if (s.empty) 258 return typeof(return)(crlf ? "\r\n" : [immutable char(c)], false, false); 259 if (crlf) 260 app ~= "\r\n"; 261 else 262 app ~= c; 263 c = nonSpace; 264 } 265 auto lag = s; 266 mainLoop: 267 while (true) { 268 assert(!s.empty, "Stepped inside the main parsing loop with an empty string"); 269 static if (shouldQuote) { 270 // Keyword or named parameter. 271 if (isKeywordStart(c)) { 272 c = s._skipIdent(); 273 if (s.empty) 274 break mainLoop; 275 continue mainLoop; 276 } 277 // Identifier. 278 if (isIdentStart(c)) { 279 if (c == charX && s.length >= 2 && (c = s[1]) == '\'') { 280 // Wait, it's a blob string. 281 s.popFront(); 282 goto someString; 283 } 284 app ~= lag.source[0 .. $ - s.length]; 285 app ~= '"'; 286 lag = s; 287 c = s._skipIdent(); 288 app ~= lag.source[0 .. $ - s.length]; 289 app ~= '"'; 290 lag = s; 291 if (s.empty) 292 break mainLoop; 293 continue mainLoop; 294 } 295 } 296 // Line break. 297 static if (shouldDedent) 298 if (_isLineBreak(c)) { 299 lineBreak: 300 s.popFront(); 301 if (s.empty) 302 break mainLoop; 303 c = s.front; 304 if (_isSpace(c) || _isLineBreak(c)) { 305 // The following line is indented. 306 app ~= lag.source[0 .. $ - s.length]; 307 c = s._skipAnyWhitespace(); 308 lag = s; 309 if (s.empty) 310 break mainLoop; 311 } 312 continue mainLoop; 313 } 314 // Qualified name. 315 if (c == '[') { 316 app ~= lag.source[0 .. $ - s.length]; 317 s.popFront(); 318 319 const q = _parseQualifiers(s); 320 if (q.schema != "-") { 321 app ~= `"%`; 322 if (q.schema.empty) { 323 app ~= schemaDefaultIndex; 324 usesSchema = true; 325 } else 326 app ~= q.schema; 327 app ~= `$s".`; 328 } 329 if (q.moduleId != "-") { 330 app ~= `"%`; 331 if (q.moduleId.empty) { 332 app ~= moduleIdDefaultIndex; 333 usesModuleId = true; 334 } else 335 app ~= q.moduleId; 336 app ~= `$s`; 337 } else 338 app ~= '"'; 339 340 lag = s = _copyBracketedIdent(s, app); 341 app ~= '"'; 342 if (s.empty) 343 break mainLoop; 344 c = s.front; 345 continue mainLoop; 346 } 347 // Some kind of strings. 348 if (_isStringStart(c)) { 349 someString: 350 const delim = c; 351 while (true) { 352 s.popFront(); 353 if (s.empty) 354 break mainLoop; 355 c = s.front; 356 if (c == delim) { 357 s.popFront(); 358 if (s.empty) 359 break mainLoop; 360 c = s.front; 361 if (c != delim) // Escaped delimiter. 362 continue mainLoop; 363 } 364 } 365 } 366 static if (shouldQuote) { 367 // Number. 368 if (_isDigit(c)) { 369 // Must parse `1.e2` as a single token. 370 c = s._skipWhile!(c => _isIdent(c) || c == '.'); 371 if (s.empty) 372 break mainLoop; 373 continue mainLoop; 374 } 375 // Printf placeholder (must not quote letters in it). 376 if (c == '%' && s.length >= 2) { 377 s = s._skipPlaceholder(); 378 if (s.empty) 379 break mainLoop; 380 c = s.front; 381 continue mainLoop; 382 } 383 } 384 // Single-line comment. 385 if (c == '-' && s.length >= 2 && s[1] == '-') { 386 static if (shouldStripComments) { 387 const commentStart = s.length; 388 if (s._skipSingleLineComment!true()) { 389 app ~= lag.source[0 .. $ - commentStart]; 390 lag = s; 391 } 392 } else 393 s._skipSingleLineComment!false(); 394 static if (shouldDedent) 395 goto lineBreak; // Careful: we have a wrong value of `c` at the moment. 396 else { 397 c = '\n'; 398 continue mainLoop; 399 } 400 } 401 // Multi-line comment. 402 if (c == '/' && s.length >= 2 && s[1] == '*') { 403 static if (shouldStripComments) { 404 const commentStart = s.length; 405 if (s._skipMultiLineComment!true()) { 406 app ~= lag.source[0 .. $ - commentStart]; 407 app ~= ' '; // Comments can delimit tokens. 408 lag = s; 409 } 410 } else 411 s._skipMultiLineComment!false(); 412 if (s.empty) 413 break mainLoop; 414 c = s.front; 415 continue mainLoop; 416 } 417 // Some other character. 418 s.popFront(); 419 if (s.empty) 420 break mainLoop; 421 c = s.front; 422 } 423 app ~= lag.source; 424 return typeof(return)(app.data, usesSchema, usesModuleId); 425 }